%%%% This script matches the Uber trips geolocalized by lattitude and
%%%% longitude  with the Manzanas (census tract) for the 
%%%% from the Geostatistical Framework of Mexico (Marco estadistio de
%%%% Mexixo of 2018), which are in Labert.
%%%% The uber trips are in files : coordinates_d.csv and coordinates_o.csv for origin and destination respectively 
%%%% The Census tracts (manzanas) are in censo_georeference.csv
%%%% Step 1 converts the Uber trips into Lambert coordinates of the same types used in Mexco's Geostatistical Framework
%%%% Step 2 matches the trips into the centroid of the census tractc of the DF and EdoMex. 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear all;
close all;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%% Step 1 
%%%% this script converts latitutde and longitude coordinates into Lambert
%%%% conformal conic section coordinates in the plane using ellispoidial
%%%% deformation as GRS80 method
%%%% coordinates are given as ITRF92
%%%% the parameters are already chosen for Meixco geolocalized census data
%%% define the particular projection

a  = 6378137.0 ; %% earth radius, in meters
f = 1/298.257222101 ;
e = sqrt(2*f  -f^2) ;
%%% Reference latitude phi_0 and longitude (lambda_0)
lambda_0 = deg2rad(-102);
phi_0    = deg2rad(12);
%%% Standard paralells latitudes
phi_S = deg2rad(17.5);
phi_N = deg2rad(29.5);
%%% Falso Northing: 
N_0 = 0;
%%% Falso Easting:
E_0 = 2500000;

phi = phi_S;
w1 = sqrt(1 - e^2 * sin(phi)^2 ) ;
phi = phi_N;
w2 = sqrt(1 - e^2 * sin(phi)^2 ) ;
m1 = cos(phi_S)/w1;
m2 = cos(phi_N)/w2;
phi = phi_0 ;
t0 = sqrt( (1-sin(phi))/(1+sin(phi)) *((1+e*sin(phi))/(1-e*sin(phi)) )^e ) ;
phi =phi_S;
t1 = sqrt( (1-sin(phi))/(1+sin(phi)) *((1+e*sin(phi))/(1-e*sin(phi)) )^e ) ;
phi=phi_N;
t2 = sqrt( (1-sin(phi))/(1+sin(phi)) *((1+e*sin(phi))/(1-e*sin(phi)) )^e ) ;
n = (log(m1)-log(m2))/ (log(t1) -log(t2)); %sin(phi_0);
F = m1/(n*t1^n);
Rb = a * F * t0^n ;

coordinates_o = readtable('coordinates_d.csv');
phi_eval    = deg2rad(coordinates_o.lat);
lambda_eval = deg2rad(coordinates_o.lng);
id = coordinates_o.id ;
tic;
phi = phi_eval;
t = sqrt( (1-sin(phi))./(1+sin(phi)) .* ((1+e*sin(phi))./(1-e*sin(phi)) ).^e ) ;
m = cos(phi)./t;
R = a * F * t.^n ;
gamma = (lambda_eval-lambda_0)*n;
E = R .* sin(gamma) + E_0;
N = Rb - R .* cos(gamma) + N_0 ;
toc;
lambert=[coordinates_o.id,N,E];
Table_temp = table(id,E,N,'VariableNames',{'Trip_id','East_x','North_y'});


! rm coordinates_d_lambert.csv ;
writetable(Table_temp,'coordinates_d_lambert.csv');
clear Table_temp ;
clear coordinates_o ;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%% Step 2 

N_Trips = length(id);
Trips_lat_long = zeros(N_Trips,3);
Trips_lat_long(:,1) = id;
Trips_lat_long(:,2) = E;
Trips_lat_long(:,3) = N;

%%% Reads manzanas
censo = readtable('censo_georeference.csv') ;
ID_manzana = censo.ID;
N_Census_tracts = length(ID_manzana) ;
Census_tract_lat_long = zeros(N_Census_tracts,3);
Census_tract_lat_long(:,1) = ID_manzana;
Census_tract_lat_long(:,2) = censo.x_centroid;
Census_tract_lat_long(:,3) = censo.y_centroid;

clear censo ; 
match_trip_manzana_dist = zeros(N_Trips,3);

min_dist_square = zeros(N_Trips,1);
index_min = zeros(N_Trips,1);

tic; 
for i=1:N_Trips
    [min_dist_square(i) index_min(i) ] = ...
    min( (Trips_lat_long(i,2)-Census_tract_lat_long(:,2)).^2 + (Trips_lat_long(i,3)-Census_tract_lat_long (:,3)).^2) ;
    j = index_min(i) ;
    match_trip_manzana_dist(i,1) = Trips_lat_long(i,1);
    match_trip_manzana_dist(i,2) = Census_tract_lat_long(j,1);
end
min_dist = sqrt(min_dist_square);
match_trip_manzana_dist(:,3) = min_dist;
toc;

Table_temp_match = table(match_trip_manzana_dist(:,1),match_trip_manzana_dist(:,2),match_trip_manzana_dist(:,3),'VariableNames',{'Trip_id','Census_tract_id','Distance_mts'});
! rm matches_d_trips_manzanas.csv ;
writetable(Table_temp_match,'matches_d_trips_manzanas.csv');

average_d = mean(match_trip_manzana_dist(:,3));
median_d = median(match_trip_manzana_dist(:,3));

figure(1);
hist(min(match_trip_manzana_dist(:,3),500),300);
xl = xlabel('Meters from centroid of census tract to trip');
set(xl,'Fontsize',18,'Interpreter','Latex');
ti=title('Distribution of distance of trips to census tract, top-coded at 500m');
set(ti,'Fontsize',18,'Interpreter','Latex');
colormap(gray);
saveas(gcf,'FigureC1a','epsc')

save('matches_trips_manzanas_d.mat','match_trip_manzana_dist');

trip_by_manzana = grpstats(Table_temp_match,'Census_tract_id',{'mean','max','min','std'},'DataVars',{'Distance_mts'});

average_ntrips_bym = mean(trip_by_manzana.GroupCount);
median_ntrips__bym = median(trip_by_manzana.GroupCount);

figure(2); 
hist(min(trip_by_manzana.GroupCount,1000),200);
xl = xlabel('Number of trips in census tract');
set(xl,'Fontsize',18,'Interpreter','Latex');
ti=title('Distribution of number of trips per census tract , top-coded at 1000');
set(ti,'Fontsize',18,'Interpreter','Latex');
colormap(gray);
saveas(gcf,'FigureC1b','epsc')

  